library(readxl)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.0.5 ✓ dplyr 1.0.3
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(nycflights13)
read_excel("globalterrorismdatabase_final.xlsx")->gtd_data
gtd_data
## gtd_data is the raw data
dim(gtd_data)
## [1] 201183 16
The data set gtd_data has 201183 rows and 16 columns about each terrorism incident. This data set is an analysis of 201,183 incidents of terrorism over 48 years.
class(gtd_data)
## [1] "tbl_df" "tbl" "data.frame"
## The dataset gtd_data is a data frame.
is.data.frame(gtd_data)
## [1] TRUE
as.data.frame(gtd_data)->gtd
is.tibble(gtd)
## Warning: `is.tibble()` was deprecated in tibble 2.0.0.
## Please use `is_tibble()` instead.
## [1] FALSE
## The dataset gtd_data is not a tibble.
as.tibble(gtd)->gtd
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
is.tibble(as.tibble(gtd))
## [1] TRUE
## Now the data set gtd has been transformed into a tibble.
The GTD is an open-source database produced by the National Consortium for the Study of Terrorism and Responses to Terrorism (START), based in the University of Maryland. The GTD provides information on domestic and international terrorist attacks around the world since 1970, and now includes more than 200,000 events. For each event, a wide range of information is available, including the date and location of the incident, the weapons used, nature of the target, the number of casualties, and – when identifiable – the group or individual responsible.
The variables in our final data set, after tidying, are:
First, we used rename() to make some of the variable/column names shorter and simpler.
library(dplyr)
gtd_data%>%
rename("year"=iyear)%>%
rename("country"=country_txt)%>%
rename("region"=region_txt)%>%
rename("attack"=attacktype_txt)%>%
rename("targettype"=targtype1)%>%
rename("target"=targtype1_txt)%>%
rename("group"=gname)%>%
rename("weapontype"=weaptype1)%>%
rename("weapon"=weaptype1_txt)%>%
rename("killed"=nkill)%>%
rename("wounded"=nwound)->gtd
## gtd is final data set for our use
Second, we looked at the levels of categorical variables to see if we could tidy some of the entries.
library(forcats)
levels(factor(gtd$attack))
## [1] "Armed Assault" "Assassination"
## [3] "Bombing/Explosion" "Facility/Infrastructure Attack"
## [5] "Hijacking" "Hostage Taking (Barricade Incident)"
## [7] "Hostage Taking (Kidnapping)" "Unarmed Assault"
## [9] "Unknown"
The variable attack has 9 levels. Since hostage taking is used twice to describe an attack type, we are going to change the entries. We are going to change “Hostage Taking (Kidnapping)” into “H-Kidnapping” and change “Hostage Taking (Barricade Incident)” into “H-Barricade” to avoid confusion.
gtd[gtd=="Hostage Taking (Kidnapping)"]<-"H-Kidnapping"
gtd[gtd=="Hostage Taking (Barricade Incident)"]<-"H-Barricade"
## check the levels again
levels(factor(gtd$attack))
## [1] "Armed Assault" "Assassination"
## [3] "Bombing/Explosion" "Facility/Infrastructure Attack"
## [5] "H-Barricade" "H-Kidnapping"
## [7] "Hijacking" "Unarmed Assault"
## [9] "Unknown"
## entries successfully changed
levels(factor(gtd$target))
## [1] "Abortion Related" "Airports & Aircraft"
## [3] "Business" "Educational Institution"
## [5] "Food or Water Supply" "Government (Diplomatic)"
## [7] "Government (General)" "Journalists & Media"
## [9] "Maritime" "Military"
## [11] "NGO" "Other"
## [13] "Police" "Private Citizens & Property"
## [15] "Religious Figures/Institutions" "Telecommunication"
## [17] "Terrorists/Non-State Militia" "Tourists"
## [19] "Transportation" "Unknown"
## [21] "Utilities" "Violent Political Party"
## target has 22 levels
The variable target has 9 levels. Since government is used twice to describe an target type, we are going to change the entries. We are going to change “Government (Diplomatic)” to “G-Diplomatic” and change “Government (General)” to “G-General” to avoid confusion.
gtd[gtd=="Government (Diplomatic)"]<-"G-Diplomatic"
gtd[gtd=="Government (General)"]<-"G-General"
## check the levels again
levels(factor(gtd$target))
## [1] "Abortion Related" "Airports & Aircraft"
## [3] "Business" "Educational Institution"
## [5] "Food or Water Supply" "G-Diplomatic"
## [7] "G-General" "Journalists & Media"
## [9] "Maritime" "Military"
## [11] "NGO" "Other"
## [13] "Police" "Private Citizens & Property"
## [15] "Religious Figures/Institutions" "Telecommunication"
## [17] "Terrorists/Non-State Militia" "Tourists"
## [19] "Transportation" "Unknown"
## [21] "Utilities" "Violent Political Party"
## entries successfully changed
levels(factor(gtd$weapon))
## [1] "Biological"
## [2] "Chemical"
## [3] "Explosives"
## [4] "Fake Weapons"
## [5] "Firearms"
## [6] "Incendiary"
## [7] "Melee"
## [8] "Other"
## [9] "Radiological"
## [10] "Sabotage Equipment"
## [11] "Unknown"
## [12] "Vehicle (not to include vehicle-borne explosives, i.e., car or truck bombs)"
## weapon has 12 levels
gtd%>%
filter(weapon=="nuclear")
## confirming nuclear has 0 entries
The variable weapon has 13 categories, but since nuclear has 0 entries R is registering it as 12 levels. This is expected since terrorists do not have readily available access to nuclear materials. Since the entry “Vehicle (not to include vehicle-borne explosives, i.e., car or truck bombs)” is very long, we decided to change it to “Vehicle-NE” to make it easier for visualization.
gtd[gtd=="Vehicle (not to include vehicle-borne explosives, i.e., car or truck bombs)"]<-"Vehicle-NE"
## check the levels again
levels(factor(gtd$weapon))
## [1] "Biological" "Chemical" "Explosives"
## [4] "Fake Weapons" "Firearms" "Incendiary"
## [7] "Melee" "Other" "Radiological"
## [10] "Sabotage Equipment" "Unknown" "Vehicle-NE"
## entries successfully changed
nlevels(factor(gtd$country))
## [1] 205
## country has 205 levels
We did not want to see every country listed individually so we used nlevels() to see a total number of the countries in the data set. The final product.
gtd
#view(gtd)
We wanted to see what years have experienced the highest number of terrorist attacks and if there is some global trend over time.
library(dplyr)
gtd%>%
count(year)%>%
arrange(desc(n))->gtd_year
gtd_year%>%
print(n=10)
## # A tibble: 49 x 2
## year n
## <dbl> <int>
## 1 2014 16959
## 2 2015 15133
## 3 2016 14046
## 4 2013 12045
## 5 2017 11358
## 6 2018 9840
## 7 2012 8521
## 8 2019 8495
## 9 2011 5076
## 10 1992 5071
## # … with 39 more rows
## number of terrorist attacks internationally per year since 1970
## 2014, 2015, 2016 have the highest numbers
Based on the data table, the 9 out of the 10 years with the highest number of recorded terrorist incidents are in the 2010s. This makes sense since the Arab Spring began in 2010, the surge of U.S. and allied troops to Iraq and Afghanistan in the Obama administration, the rise of ISIS in the late 2000s, and the expansion of terrorist networks worldwide with the growth of the Internet and social media.
library(ggrepel)
ggplot(data = gtd_year)+
geom_point(mapping = aes(x = year, y = n), stat = "identity")+
xlab("Year")+
ylab("Number of Terrorist Attacks")+
ggtitle("Number of Terrorist Attacks Internationally per Year 1970-2018")+
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.5))
cor(gtd_year)
## year n
## year 1.0000000 0.6919961
## n 0.6919961 1.0000000
cor(gtd_year$year, gtd_year$n)
## [1] 0.6919961
## There is a moderately, positive correlation between number of attacks and the year.
lm(n~year, data=gtd_year)->lm_year
lm_year
##
## Call:
## lm(formula = n ~ year, data = gtd_year)
##
## Coefficients:
## (Intercept) year
## -367004.3 186.1
summary(lm_year)
##
## Call:
## lm(formula = n ~ year, data = gtd_year)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4704 -2076 475 1165 9231
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -367004.30 56472.83 -6.499 4.71e-08 ***
## year 186.06 28.31 6.572 3.65e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2889 on 47 degrees of freedom
## Multiple R-squared: 0.4789, Adjusted R-squared: 0.4678
## F-statistic: 43.19 on 1 and 47 DF, p-value: 3.654e-08
## number of terrorist attacks = -367004.3 + year(186.1)
## On average, for every one year increase, the number of terrorist attacks increases by 186.1
ggplot(data = gtd_year)+
geom_point(mapping = aes(x = year, y = n), stat = "identity")+
xlab("Year")+
ylab("Number of Terrorist Attacks")+
ggtitle("Number of Terrorist Attacks Internationally per Year 1970-2018")+
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.5))+
geom_smooth(aes(x = year, y = n),method = "lm", col = "green", se=FALSE)
## `geom_smooth()` using formula 'y ~ x'
We ran a linear regression model to check if there is relationship between year and the number of terrorism attacks that happen (n). We found that on average, the number of terrorist attacks increases by 186.1 each year. We also ran a correlation test and found that the correlation coefficient=0.6919961 so there is a moderate, positive correlation between year and the number of terrorism attacks. Based on the multiple r-squared=0.4789, our regression model explains 47.89% of the variation in the number of terrorism attacks that happen (n). This is appropriate because there are many factors that contribute to terrorism, and while there is a general trend upwards over time, time is not the best variable to measure terrorism attack rates.
We wanted to see what countries have experienced the highest number of terrorist attacks.
gtd%>%
count(country)%>%
arrange(desc(n))->gtd_country
gtd_country%>%
print(10)
## # A tibble: 205 x 2
## country n
## <chr> <int>
## 1 Iraq 26755
## 2 Afghanistan 16313
## 3 Pakistan 15208
## 4 India 13477
## 5 Colombia 8742
## 6 Philippines 7976
## 7 Peru 6109
## 8 Yemen 5526
## 9 United Kingdom 5424
## 10 El Salvador 5320
## # … with 195 more rows
## number of terrorist attacks per country since 1970
## Iraq, Afghanistan, Pakistan, and India have the highest numbers
ggplot(data = gtd_country)+
geom_bar(mapping = aes(x = country, y = n, fill = country), stat = "identity")+
xlab("Country")+
ylab("Number of Terrorist Attacks")+
ggtitle("Number of Terrorist Attacks per Country 1970-2018")+
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.5))+
theme(legend.position='none')+
theme(axis.ticks.x=element_blank(), axis.text.x=element_blank())
## too many countries
gtd%>%
count(country)%>%
arrange(desc(n))%>%
print(n=20)
## # A tibble: 205 x 2
## country n
## <chr> <int>
## 1 Iraq 26755
## 2 Afghanistan 16313
## 3 Pakistan 15208
## 4 India 13477
## 5 Colombia 8742
## 6 Philippines 7976
## 7 Peru 6109
## 8 Yemen 5526
## 9 United Kingdom 5424
## 10 El Salvador 5320
## 11 Nigeria 5070
## 12 Somalia 5037
## 13 Turkey 4464
## 14 Thailand 4162
## 15 Spain 3255
## 16 Sri Lanka 3040
## 17 United States 3004
## 18 Algeria 2749
## 19 Syria 2737
## 20 France 2726
## # … with 185 more rows
gtd_country%>%
filter(n>=2726)->gtd_country20
ggplot(data = gtd_country20)+
geom_bar(mapping = aes(x = country, y = n, fill = country), stat = "identity")+
scale_x_discrete(guide=guide_axis(angle = 45))+
xlab("Country")+
ylab("Number of Terrorist Attacks")+
ggtitle("Number of Terrorist Attacks per Country 1970-2018, Top 20")+
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.5))+
theme(legend.position='none')+
geom_text(aes(x = country, y = n, label = n), vjust = 2,
size = 3, color = "blue")
## bar graph for only the top 20 countries
Based on the data table, 5 out of the 10 countries with the highest number of recorded terrorist incidents are in Central Asia and Middle East regions. Also, 3 out of the 10 countries with the highest number of recorded terrorist incidents are in South American. This makes sense because both regions have high rates of violence, failed regimes, and political instability. For the graphs, we decided to only look at the 20 countries with the highest counts of terrorism attacks because looking at 205 countries was not useful. Note the U.S. appears in the top 20 count.
gtd%>%
summarize(CountKilled=sum(killed,na.rm=TRUE),CountWounded=sum(wounded,na.rm=TRUE))
gtd%>%
group_by(country)%>%
summarize(CountKilled=sum(killed,na.rm=TRUE),CountWounded=sum(wounded,na.rm=TRUE))->killedCountry
killedCountry
killedCountry%>%
filter(CountKilled>10000)%>%
arrange(desc(CountKilled))->killedCountry2
killedCountry2
ggplot(data = killedCountry2)+
geom_bar(mapping = aes(x = CountKilled, y = country,fill=country), stat = "identity")+
xlab("No. of Killing since 1970")+
ylab("Country")+
ggtitle("Number of Killings per Country 1970-2018, Over 10000 killings")+
theme(plot.title = element_text(color="black", size=3, face="bold", hjust=2))+
geom_text(aes(x = CountKilled, y = country, label = CountKilled), vjust = 0.2,
size = 3, color = "black")
The above bar chart is the representation of number of Killings due to terrorist activities for different countries since 2021 and with killings more than 10000. Evidently, Iraq,Afghanistan, Pakistan and India has most no. of deaths/killings becuuse of terrorism. It will be interesting to study how the killings is faring over years since 1970 for each of these countries.
We wanted to see the trend of terrorist attacks in the U.S. over the given time period. The data shows the total no. of Deaths/Killing and Total no. of Wounded people due to terrorist attack and activities from 170-2018. More than 456249 killings and 549743 wounded people.
gtd%>%
filter(country=="United States")->gtdus
gtdus<-subset(gtdus, select = -c(country,region))
gtdus
## gtdus is the data for the terrorist attacks in the U.S. only
dim(gtdus)
## [1] 3004 14
## 3004 rows and 14 columns
Filtered the data specific to the United States.
gtdus%>%
count(year)%>%
arrange(desc(n))->gtdus_year
gtdus_year%>%
print(10)
## # A tibble: 49 x 2
## year n
## <dbl> <int>
## 1 1970 468
## 2 1971 247
## 3 1975 149
## 4 1977 130
## 5 1976 105
## 6 1974 94
## 7 1978 87
## 8 1982 77
## 9 2018 75
## 10 1981 74
## # … with 39 more rows
## number of terrorist attacks per year since 1970
## 1970, 1971, 1975 have the highest numbers
ggplot(data = gtdus_year)+
geom_point(mapping = aes(x = year, y = n), stat = "identity",color='red')+
xlab("Year")+
ylab("Number of Terrorist Attacks")+
ggtitle("Number of Terrorist Attacks in the U.S. per year 1970-2018")+
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.5))
cor(gtdus_year)
## year n
## year 1.0000000 -0.5129769
## n -0.5129769 1.0000000
cor(gtdus_year$year, gtdus_year$n)
## [1] -0.5129769
## There is a moderately, negative correlation between number of attacks and the year.
lm(n~year, data=gtdus_year)->lm_usyear
lm_usyear
##
## Call:
## lm(formula = n ~ year, data = gtdus_year)
##
## Coefficients:
## (Intercept) year
## 5085.846 -2.519
summary(lm_usyear)
##
## Call:
## lm(formula = n ~ year, data = gtdus_year)
##
## Residuals:
## Min 1Q Median 3Q Max
## -57.55 -30.91 -13.09 2.70 344.90
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5085.8458 1226.4563 4.147 0.000140 ***
## year -2.5192 0.6149 -4.097 0.000164 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 62.74 on 47 degrees of freedom
## Multiple R-squared: 0.2631, Adjusted R-squared: 0.2475
## F-statistic: 16.78 on 1 and 47 DF, p-value: 0.000164
## number of terrorist attacks = 5085.846 + year(-2.519)
## On average, for every one year increase, the number of terrorist attacks decreases by 2.519.
ggplot(data = gtdus_year)+
geom_point(mapping = aes(x = year, y = n), stat = "identity",color='red')+
xlab("Year")+
ylab("Number of Terrorist Attacks")+
ggtitle("Number of Terrorist Attacks in the U.S. per year 1970-2018")+
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.5))+
geom_smooth(aes(x = year, y = n),method = "lm", col = "green", se=FALSE)
## `geom_smooth()` using formula 'y ~ x'
ggplot(data = gtdus_year)+
geom_bar(mapping = aes(x = year, y = n), fill = "lightblue", stat = "identity")+
xlab("Year")+
ylab("Number of Terrorist Attacks")+
ggtitle("Number of Terrorist Attacks in the U.S. Per Year Since 1970")+
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.5))+
theme(legend.position='none')
gtdus%>%
filter(year==1993)
## no recorded terrorist attacks in 1993
The data table shows there were 3004 terrorist attacks in the U.S. between 1970-2018. 1970 is the year with the highest number of terrorism attacks and the 1970s is the decade with highest number of terrorism attacks. We ran a linear regression model to check if there is relationship between year and the number of terrorism attacks that happen (n). We found that on average, the number of terrorist attacks decreases by 2.519. We also ran a correlation test and found that the correlation coefficient=-0.5129769 so there is a moderate, negative correlation between year and the number of terrorism attacks. Based on the multiple r-squared=0.2631, our regression model explains 26.31% of the variation in the number of terrorism attacks that happen (n). This is appropriate because American law enforcement and the intelligence community have made counterterrorism their priority since the increases in violence exhibited by extremist groups in the 1970s.
We wanted to see the trend of terrorist attacks in the U.S. over the given time period per state.
gtdus%>%
count(provstate)%>%
arrange(desc(n))->gtdus_provstate
gtdus_provstate%>%
print(n=10)
## # A tibble: 54 x 2
## provstate n
## <chr> <int>
## 1 California 613
## 2 New York 536
## 3 Puerto Rico 248
## 4 Florida 168
## 5 Illinois 114
## 6 Washington 113
## 7 District of Columbia 86
## 8 Texas 83
## 9 Oregon 67
## 10 Massachusetts 61
## # … with 44 more rows
gtdus_provstate
## number of terrorist attacks per state since 1970
## California, New York, and Puerto Rico have the highest numbers
ggplot(data = gtdus_provstate)+
geom_bar(mapping = aes(x = provstate, y = n, fill = provstate), stat = "identity")+
scale_x_discrete(guide=guide_axis(angle = 45))+
xlab("State")+
ylab("Number of Terrorist Attacks")+
ggtitle("Number of Terrorist Attacks in the U.S. by State Since 1970")+
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.5))+
theme(legend.position='none')
## too many states
gtdus_provstate%>%
filter(n>=61)->gtdus_provstate10
library(ggrepel)
gtdus_provstate10
ggplot(data = gtdus_provstate10)+
geom_bar(mapping = aes(x = provstate, y = n, fill = provstate), stat = "identity")+
scale_x_discrete(guide=guide_axis(angle = 45))+
xlab("State")+
ylab("Number of Terrorist Attacks")+
ggtitle("Number of Terrorist Attacks in the U.S. by State Since 1970, Top 10")+
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.5))+
theme(legend.position='none')+
geom_label_repel(aes(x = provstate, y = n, label = n), vjust = 1,
size = 3, color = "black",position="stack",stat="identity")
Based on the data table and graphs, California and New York have the highest rates of terrorism attacks from 1970-2018. Texas, Florida, and D.C. also have high rates of terrorist incidents. That makes sense because these are states of high populations and have political significance. However, the high rate of terrorism attacks in Puerto Rico does not make sense.
gtd%>%
filter(country=='United States')%>%
summarize(CountKilled=sum(killed,na.rm=TRUE),CountWounded=sum(wounded,na.rm=TRUE))->killedus
killedus
gtd%>%
filter(country=='United States')%>%
group_by(provstate)%>%
summarize(CountKilled=sum(killed,na.rm=TRUE),CountWounded=sum(wounded,na.rm=TRUE))->killedus2
killedus2
killedus2%>%
filter(CountKilled>20)->killedus50
library(ggrepel)
ggplot(data = killedus50,aes(y = provstate, x = CountKilled,fill=CountKilled))+
geom_point()+
xlab("Number Killed")+
ylab("State")+
ggtitle("Fatalities in the U.S. by State Since 1970, Top 10")+
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.5))
geom_text_repel(mapping=aes(label=CountKilled))
## mapping: label = ~CountKilled
## geom_text_repel: parse = FALSE, na.rm = FALSE, box.padding = 0.25, point.padding = 1e-06, min.segment.length = 0.5, arrow = NULL, force = 1, force_pull = 1, max.time = 0.5, max.iter = 10000, max.overlaps = 10, nudge_x = 0, nudge_y = 0, xlim = c(NA, NA), ylim = c(NA, NA), direction = both, seed = NA, verbose = FALSE
## stat_identity: na.rm = FALSE
## position_identity
The data above shows the top 10 states with the the number of total confirmed fatalities for the incident. New York has the maximum fatalities and clearly is an outlier with over 2800 fatalities. The incident at 9/11 incident is one of the cases in point when over 2000 people civilians lost their lives in the incident. l
gtdus%>%
count(target)%>%
arrange(desc(n))->target1
target1
ggplot(data = target1) +
geom_point(mapping = aes(x=n, y = target) )
ggplot(data = target1) +
geom_point(mapping = aes(x=n,
y = fct_reorder(target,n),color='red'))+
xlab("Number of Terrorist Attacks")+
ylab("Target Groups")+
ggtitle("Number of Terrorist Attacks on Target Groups of US")+
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.5))+
theme(legend.position='none')
Based on the data table and graphs, Business ,Private Citizens & Property and G-General are the most targeted groups by the terrorist in the United States. Business groups being the highest with No. of Terrorist attacks more than 800.Large-scale attacks, most infamously the World Trade Center bombings on Sept. 11, 2001, destroyed billions of dollars worth of property and senselessly kill thousands of productive workers and Icreased Uncertainty in the Markets.
gtd%>%
count(attack)%>%
arrange(desc(n))->gtd_attack
gtd_attack
## The most frequent types of attacks used for a terrorist purpose are bombings/explosions and armed assaults.
gtd%>%
mutate(attack= fct_infreq(attack)) -> gssm
gssm
library(ggrepel)
ggplot(data = gssm) +
geom_bar(mapping = aes(x=attack,fill=attack))+
scale_x_discrete(guide=guide_axis(angle = 45))+
xlab("Type of Attack")+
ylab("Number of Terrorist Attacks")+
ggtitle("Terrorist Attacks Globally by Attack Type")+
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.5))+
theme(legend.position='none')
## percentages
bpattack<-ggplot(gtd_attack, aes(x="", y = n, fill=attack))+
geom_bar(width = .5, stat = "identity")
bpattack
## Stacked Bar Chart
a<-c(95402,47419,20897,13198,11636,9590,1183,1124,734)
percattack<- round(100*a/sum(a), 2)
percattack
## [1] 47.42 23.57 10.39 6.56 5.78 4.77 0.59 0.56 0.36
## percentage labels
ggplot(gtd_attack, aes(x="", y=n, fill=attack)) +
ggtitle("Terrorist Attacks Globally by Attack Type")+
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.5))+
geom_bar(stat="identity", width=1) +
coord_polar("y") +
geom_text(aes(label = paste0(percattack, "%")), position = position_stack(vjust=0.5))+
theme_void()
From the graphs above, Terrorist have used ‘Bombing’ and ‘Armed Assault’ most often as the type of attack to cause terrorism, globally. As a future research, it will be interesting to study how and from where the terrorist groups are getting funded from.
gtd%>%
count(weapon)%>%
arrange(desc(n))->gtd_weapon
gtd_weapon
bpweapon<-ggplot(gtd_weapon, aes(x="", y = n, fill=weapon))+
geom_bar(width = .5, stat = "identity")
bpweapon
## Stacked Bar Chart
b<-c(100333,64923,18462,12445,4115,344,175,170,134,36,33,13)
perweapon<- round(100*b/sum(b), 2)
perweapon
## [1] 49.87 32.27 9.18 6.19 2.05 0.17 0.09 0.08 0.07 0.02 0.02 0.01
## percentage labels
ggplot(gtd_weapon, aes(x="", y=n, fill=weapon)) +
ggtitle("Terrorist Attacks Globally by Weapon Type")+
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.5))+
geom_bar(stat="identity", width=1) +
coord_polar("y") +
geom_text(aes(label = paste0(perweapon, "%")), position = position_stack(vjust=0.4))+
theme_void()
The stacked bar graph and pie chart suggests that the terrorists have used ‘Explosives’ , ‘Firearm’ the most as the type of weapon to cause destruction.